# Figure_3.R
# (formerly Kalman_divergence_code.R)
#
# 2009 July 21
# John G. Bullock
#
# Creates PDF of Figure 3 in "Partisan Bias and the Bayesian Ideal
# in the Study of Public Opinion," Journal of Politics 71 (July): 
# 1109-24, http://dx.doi.org/10.1017/S0022381609090914.
#
# sessionInfo() output is at the bottom of this file.

if (any(regexpr('Hmisc', search())>-1)) { detach(package:Hmisc) }
library(grid)
library(lattice)
set.seed(1977) # to get different simulation results, change the seed or delete this line


# GENERATE THE DATA
nrows <- 5
ncols <- 4
panel.periods <- rep(6, nrows*ncols) # for how many stages will we see updating in each simulation?
simsperpanel  <- 030                 # number of simulations per panel
mu.0   <- rep(2.0, nrows*ncols)      # starting value for true parameter of interest 
mu.var <- rep(0.0, nrows*ncols)      # variance on error term for true parameter of interest
gamma  <- rep(c(1.05, 1.10, 1.15, 1.20), nrows)
x.var  <- rep(c(1, 5, 10, 15, 20), 1, each=ncols) 
D.prior.means     <- rep(2, nrows*ncols) 
R.prior.means     <- rep(1, nrows*ncols) 
D.prior.variances <- rep(c(1.25, 1.25, 1.25, 1.25), nrows)
R.prior.variances <- rep(c(1.00, 1.00, 1.00, 1.00), nrows)


# KALMAN UPDATING FUNCTION
Kalman.update <- function(  periods
                          , D.prior.mean, D.prior.variance
                          , R.prior.mean, R.prior.variance
                          , mu.0, mu.var
                          , gamma
                          , x.var
                          , include.prior=TRUE # report back the prior parameters in the output, in addition to all posteriors
                          , x.first=NULL       # specify the first message that updaters receive instead of calculating it from other arguments
                          ) {
  periods          <- periods+10
  D.post.means     <- c(D.prior.mean, rep(NA, periods))    
  D.post.variances <- c(D.prior.variance, rep(NA, periods))
  R.post.means     <- c(R.prior.mean, rep(NA, periods))    
  R.post.variances <- c(R.prior.variance, rep(NA, periods))  
  mu.post          <- c(mu.0, rep(NA, periods))       # values of mu, the true param. of interest, at each stage
  x                <- rep(NA, periods+1)              # values of the messages that updaters actually see
  k.D              <- rep(NA, periods+1)
  k.R              <- rep(NA, periods+1)  
  
  # simulate true posterior means, values of messages received (x[1]...x[n]), and more
  for (i in 2:(periods+1)) {
    mu.post[i] <- gamma*mu.post[i-1] + rnorm(1, mean=0, sd=sqrt(mu.var))
    if (!is.null(x.first) && length(x.first)>=i-1) { x[i] <- x.first[i-1] }    
    else { x[i]         <- mu.post[i] + rnorm(1, mean=0, sd=sqrt(x.var)) }
    D.post.variances[i] <- 1/(1/(gamma^2*D.post.variances[i-1] + mu.var) + 1/x.var)
    R.post.variances[i] <- 1/(1/(gamma^2*R.post.variances[i-1] + mu.var) + 1/x.var)    
    k.D[i]              <- D.post.variances[i]/x.var
    k.R[i]              <- R.post.variances[i]/x.var
    D.post.means[i]     <- (1-k.D[i])*gamma*D.post.means[i-1] + k.D[i]*x[i]
    R.post.means[i]     <- (1-k.R[i])*gamma*R.post.means[i-1] + k.R[i]*x[i]    
  }
  gamma <- rep(gamma, periods+1)
  x.var <- rep(x.var, periods+1)
  periods <- 0:periods # just to help myself interpret the output of this function -- R uses 1-based indexing, but I like 0-based indexing
  post <- cbind(periods, D.post.means, R.post.means, D.post.variances, R.post.variances, mu.post, gamma, x, x.var, k.D, k.R)
  if (!include.prior) { post <- post[2:nrow(post), ] }
  return(post)
}


# SIMULATE POSTERIOR MEANS GIVEN THE STARTING VALUES
# Note that the simulations are run (i.e., Kalman.update()
# is called) simsperpanel times for each panel.
# Each call to Kalman.update() simulates over more than
# 10 periods.  In each simulation, I just save the
# results from the first few periods.
D.post.means <- R.post.means <- x <- vector("list", nrows*ncols) 
for (i in 1:(nrows*ncols)) { # for each panel
  D.post.means[[i]] <- R.post.means[[i]] <- x[[i]] <- vector("list", simsperpanel)
  for (j in 1:simsperpanel) {
    tmp <- Kalman.update(  panel.periods[i], D.prior.means[i], D.prior.variances[i], R.prior.means[i], R.prior.variances[i]
                         , mu.0[i], mu.var[i], gamma[i], x.var[i])
    D.post.means[[i]][[j]] <- tmp[1:(panel.periods[i]+1), 'D.post.means']
    R.post.means[[i]][[j]] <- tmp[1:(panel.periods[i]+1), 'R.post.means']
    x[[i]][[j]]            <- tmp[1:(panel.periods[i]+1), 'x']
  }
}


# CREATE THE DATA FRAME
Kalman.divergence.data <- expand.grid(stage=0:panel.periods[1], simulation=1:simsperpanel, panel=1:(nrows*ncols)) # stage 0 gives the priors
Kalman.divergence.data$mu.D <- unlist(D.post.means)
Kalman.divergence.data$mu.R <- unlist(R.post.means)
Kalman.divergence.data$diff <- abs(unlist(D.post.means)-unlist(R.post.means))
Kalman.divergence.data$diff.from.prev <- c(NA, diff(Kalman.divergence.data$diff))
  Kalman.divergence.data$diff.from.prev[Kalman.divergence.data$stage==0] <- NA
Kalman.divergence.data$diff.from.prev.percent <- rep(NA, nrow(Kalman.divergence.data)) # percentage change from previous difference
  for (i in 2:nrow(Kalman.divergence.data)) {
    if (Kalman.divergence.data$stage[i]==0) { next }
    Kalman.divergence.data$diff.from.prev.percent[i] <- 100 *  abs(Kalman.divergence.data$diff.from.prev[i])/Kalman.divergence.data$diff[i-1]
  }
Kalman.divergence.data$var.D.prior <- rep(rep(D.prior.variances, each=panel.periods[1]+1), each=simsperpanel)  
Kalman.divergence.data$var.R.prior <- rep(rep(R.prior.variances, each=panel.periods[1]+1), each=simsperpanel)  
Kalman.divergence.data$x           <- unlist(x)
Kalman.divergence.data$x.var       <- rep(rep(x.var,             each=panel.periods[1]+1), each=simsperpanel)  
Kalman.divergence.data$gamma       <- rep(rep(gamma,             each=panel.periods[1]+1), each=simsperpanel)  


# WHERE DOES DIVERGENCE OCCUR?  [2007 12 28]
# tapply(Kalman.divergence.data$diff, Kalman.divergence.data$panel, max)   # largest within-stage differences
# Kalman.divergence.data$diff[Kalman.divergence.data$stage==panel.periods] # differences at final stage
# tapply(  Kalman.divergence.data$diff[Kalman.divergence.data$stage==panel.periods]
#        , Kalman.divergence.data$panel[Kalman.divergence.data$stage==panel.periods]
#        , mean)                                                           # mean gap at final stage
# tapply(  Kalman.divergence.data$diff[Kalman.divergence.data$stage==panel.periods]
#        , Kalman.divergence.data$panel[Kalman.divergence.data$stage==panel.periods]
#        , median)                                                         # median gap at final stage
# tapply(  Kalman.divergence.data$diff.from.prev
#        , Kalman.divergence.data$panel, function (x) sum(x>0, na.rm=T))   # how many simulations show stage-to-stage divergence?
# tapply(  Kalman.divergence.data$diff.from.prev.percent[Kalman.divergence.data$diff.from.prev>0]
#        , Kalman.divergence.data$panel[Kalman.divergence.data$diff.from.prev>0]
#        , function (x) mean(x, na.rm=T))                                  # when divergence occurs, what is avg. percentage increase in belief gap?
# tapply(  Kalman.divergence.data$diff.from.prev.percent[Kalman.divergence.data$diff.from.prev<0]
#        , Kalman.divergence.data$panel[Kalman.divergence.data$diff.from.prev<0]
#        , function (x) mean(x, na.rm=T))                                  # when convergence occurs, what is avg. percentage increase in belief gap?


# WHERE DOES POLARIZATION OCCUR? [2007 12 23]
# Kalman.divergence.data$D.diff <- c(NA, diff(Kalman.divergence.data$mu.D))
# Kalman.divergence.data$R.diff <- c(NA, diff(Kalman.divergence.data$mu.R))
# Kalman.divergence.data$pol    <- rep(NA, nrow(Kalman.divergence.data))
# for (i in 1:nrow(Kalman.divergence.data)) {
#   if (Kalman.divergence.data$stage[i]==0) { next }
#   if (Kalman.divergence.data$diff.from.prev[i]>0 & Kalman.divergence.data$D.diff[i]/Kalman.divergence.data$R.diff[i]<0) {
#     Kalman.divergence.data$pol[i] <- Kalman.divergence.data$stage[i]
#   }
# }
# which(!is.na(Kalman.divergence.data$pol))
# Kalman.divergence.data[!is.na(Kalman.divergence.data$pol),]
             

# PRELIMINARIES FOR PRINTING THE FIGURE
dir.output       <- '.'                        # where the figure goes
filename.output  <- 'Figure_3.pdf'
PDFtitle         <- 'Divergence and convergence under Bayesian learning about a changing condition'
PSwidth          <-  9.5
PSheight         <- 12.0
postscript.bg    <- 'transparent'
panelwidth      <- list(1.090, "inches")       # these dimensions allow 4 columns on 8.5" paper
panelheight     <- list(0.86875, "inches") 
plotlinealpha   <- .725                        # transparency (1 = opaque)
plotlinecolor   <- rgb(0, 0, 0, plotlinealpha) # first three params are gray (lower=darker)
plotlinewidth   <- 1
panelbordercol  <- 'black'
panelborderwidth <- .3
panelnumbercol   <- grey(.2)
panelLayout      <- c(ncols, nrows)            # columns, then rows
xbetween        <- 1.105                       # space between columns
ybetween        <- 1.3175                      # space between rows
panel.xlim      <- NULL 
panel.ylim      <- c(0, 1.70) 
striptextsize   <-  .75      # cex
xaxtextsize      <-  .8*.87  # cex
yaxtextsize      <-  .8*.87  # cex
axisticksize    <- .4
xaxs            <- list(draw=TRUE, labels=0:panel.periods[1], at=0:panel.periods[1], tck=c(axisticksize, 0), col=panelbordercol, cex=xaxtextsize,
                        alternating=1, relation='free', axs='i') # axs='i' means that there is no padding around the xlimits
yaxs            <- list(  draw=TRUE
                        , labels=c(0, '.5', '1', '1.5', '2'), at=c(0, .5, 1, 1.5, 2)
                        , tck=c(axisticksize, 0)
                        , col=panelbordercol
                        , cex=yaxtextsize
                        , alternating=1
                        , relation='free' # but the ylim will constrain all panels to same height
                        , axs='i'
                        , rot=90 
                        ) 


# FUNCTION TO STRIP BAD CHARACTERS FROM BEGINNING OF STRING
stripinit <- function(x,bad=" ") { # strips bad from beginning of string
  bad <- paste(bad,"*",sep="")
  sub(bad,'',x)
}


# PANEL FUNCTION
Kalman.divergence.panel <- function(...) {
  trellis.par.set("clip", list(panel="off", strip="off")) # for permitting panel.axis(outside=T, ...)

  # plot data
  panel.xyplot(...)

  # per-panel labels and rugs
  trellis.par.set("clip", list(panel="off", strip="off")) # for permitting panel.axis(outside=T, ...)
  grid.text(panel.number(), x=.965, y=.875, just='right', gp=gpar(font=1, cex=.80, col=panelnumbercol)) # lower numbers are darker for grey()  
  grid.text('time',                x= .51,  y=-.33, just='center', gp=gpar(font=1, cex=.72, col=panelbordercol))
  grid.text('difference of means', x=-.250, y= .50, hjust=.5, vjust=.5, gp=gpar(font=1, cex=.705, col=panelbordercol), rot=90)
   
  # labels above each column and beside each row
  # note: expression(rho) produces \sigma
  var.D.prior <- stripinit(Kalman.divergence.data$var.D.prior[Kalman.divergence.data$panel==panel.number()][1], 0)
  var.R.prior <- stripinit(Kalman.divergence.data$var.R.prior[Kalman.divergence.data$panel==panel.number()][1], 0)  
  x.var       <- stripinit(Kalman.divergence.data$x.var[Kalman.divergence.data$panel==panel.number()][1], 0)
  gamma.val   <- stripinit(Kalman.divergence.data$gamma[Kalman.divergence.data$panel==panel.number()][1], 0)  
  if (panel.number()%in%1:4) {
    grid.text(substitute(paste(gamma==gamma.val, sep='')), x=.5, y=1.25, just='center', gp=gpar(font=1, cex=.8, col=panelbordercol))
    if (panel.number()==1)      {
        grid.lines(x=c( .004, 5.320), y=c(1.100,  1.100), gp=gpar(lwd=0.75, col=panelbordercol, lineend='square')) # horizontal line
        grid.lines(x=c(-.400, -.400), y=c( .987, -6.390), gp=gpar(lwd=0.75, col=panelbordercol, lineend='square')) # vertical line
    }    
  }
  if (panel.number()%in%c(1,5,9,13,17)) {
    grid.text(substitute(sigma[x]^2==x.var), x=-.50, y=.5,  vjust=.5, hjust=.5, rot=90, gp=gpar(font=1, cex=.8, col=panelbordercol)) 
  }

  # information at bottom of page
  if (panel.number()==18) { grid.text(paste(Sys.time(), '; mu.0 = ', mu.0[1], '; D.prior = ', D.prior.means[1], '; R.prior = ', R.prior.means[1], '; simsperpanel = ', simsperpanel, '; alpha = ', plotlinealpha, '; lwd = ', plotlinewidth, sep=''), y=-1, just='center', gp=gpar(font=2)) }
}


# MAKE THE PDF FILE
setwd(dir.output)
pdf(file=filename.output, width=PSwidth, height=PSheight, paper="special", onefile=TRUE
    , version='1.4'                                                                            # minimum required for transparency
    , title=PDFtitle
    , bg=postscript.bg)
trellis.par.set("dot.symbol", list(alpha=1, cex=0, col="black", font=1, pch=16))             # black dot (not grey) to mark the mean / cex=0 for no dot; .5 is normal
trellis.par.set("axis.line", list(alpha=1, col=panelbordercol, lty=1, lwd=panelborderwidth)) # to eliminate lattice panel border, set col="white"
trellis.par.set(superpose.line=list(alpha=1, col='black', lty=1, lwd=plotlinewidth))         # for distinguishing between Dems and Republicans
trellis.par.set(superpose.line=list(  col=plotlinecolor
                                    , lty=c(1,1), lwd=plotlinewidth))      
trellis.par.set("axis.line", list(alpha=1, col=panelbordercol, lty=1, lwd=panelborderwidth))   # make panel borders transparent
Kalman.divergence.plot <- xyplot(diff~stage | panel, data=Kalman.divergence.data, type="l", 
                   , groups=simulation # for putting the N and C symbols on the same lines, and having only three lines to the plot
                   , xlab='', ylab=''
                   , main=''
                   , ylim=panel.ylim # c(0,2)
                   , panel=Kalman.divergence.panel
                   , layout=panelLayout               # x columns, y rows
                   , as.table=TRUE                    # plot panels left to right, top to bottom
                   , between=list(y=ybetween, x=xbetween)                   
                   , strip=F
                   , scales=list(x=xaxs, y=yaxs)
                   )
print(Kalman.divergence.plot, panel.width=panelwidth, panel.height=panelheight) # more=TRUE
dev.off()
shell.exec(paste(getwd(), '/', filename.output, sep=''))       # in Unix/Linux, use shell(filename.output, wait=T)

# > date()
# [1] "Fri Jul 10 19:10:25 2009"
# > sessionInfo()
# R version 2.8.1 (2008-12-22) 
# i386-pc-mingw32 
# 
# locale:
# 		LC_COLLATE=English_United States.1252;LC_CTYPE=English_United States.1252;LC_MONETARY=English_United States.1252;LC_NUMERIC=C;LC_TIME=English_United States.1252
#
# attached base packages:
# 		[1] splines   grid      grDevices utils     datasets  graphics  stats     methods   base     
#
# other attached packages:
#     	 [1] plotrix_2.6        R2WinBUGS_2.1-14   lme4_0.999375-28   Matrix_0.999375-23 xtable_1.5-5       pscl_1.03         
#        [7] coda_0.13-4        mvtnorm_0.9-7      MASS_7.2-47        boot_1.2-36        xlsReadWrite_1.3.3 Design_2.1-2      
#       [13] survival_2.35-4    lattice_0.17-25    car_1.2-12        
#
#loaded via a namespace (and not attached):
#		 [1] arm_1.2-9      cluster_1.12.0 foreign_0.8-35 gdata_2.4.2    gtools_2.5.0-1 Hmisc_3.5-2   
